cd BitDistiller/quantization
CUDA_VISIBLE_DEVICES=0 python autoclip.py --model_path /mnt/data/models/llama2-7b-hf \
    --calib_dataset pile --quant_type int --w_bit 4 \
    --q_group_size 128 --run_clip \
    --dump_clip ./clip_cache/hf-llama2-7b/int4-g128.pt

cd /BitDistiller/data/generation
# vllm
python generate_vllm.py --base_model /mnt/data/models/llama2-7b-hf --dataset_name wikitext --out_path ./datasets/hf-llama-2-7b/ --max_sample 3000
python generate_vllm.py --base_model /mnt/data/models/llama2-7b-hf --dataset_name alpaca --out_path ./datasets/hf-llama-2-7b/ --max_sample 5000

# change to path in .py
python mix_data.py

cd train
    
    bash train.sh ../data/datasets/hf-llama-2-7b/mix_wiki_alpaca_8000.json ./ckpts/hf-llama-2-7b/int2-g128/ ./logs/hf-llama-2-7b/int2-g128/ 4